package com.hngy.scala

import org.apache.spark.{SparkConf, SparkContext}

/**
  * 需求：checkpoint的使用
  */
object CheckPointOpScala {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setAppName("CheckPointOpScala")
      .setMaster("local")
    val sc = new SparkContext(conf)

    if( args.length == 0 ){
      System.exit(100)
    }

    val outputPath = args(0)
    println(outputPath)

    //1：设置checkpint目录
    sc.setCheckpointDir("hdfs://hadoop001:9001/chk001")
    //val dataRDD = sc.textFile("D:\\b.txt")
    val dataRDD = sc.textFile("hdfs://hadoop001:9001/word.txt")
    //2：对rdd执行checkpoint操作
    dataRDD.checkpoint()
    dataRDD.flatMap(_.split(" "))
        .map((_,1))
        .reduceByKey(_ + _)
        //.foreach(println(_))
        .saveAsTextFile(outputPath)

    sc.stop()
  }
}
